Spatial interpolation

The contents will appear later

This section will contain hands-on material that will appear later.

import geopandas as gpd
import tobler
import pyinterpolate
import numpy as np

from libpysal import graph
from sklearn import neighbors
from scipy import interpolate

Area interpolation

simd = gpd.read_file(
    "https://martinfleischmann.net/sds/chapter_09/data/edinburgh_simd_2020.gpkg"
)
simd.head(2)
DataZone DZName LAName SAPE2017 WAPE2017 Rankv2 Quintilev2 Decilev2 Vigintilv2 Percentv2 ... CrimeRate CrimeRank HouseNumOC HouseNumNC HouseOCrat HouseNCrat HouseRank Shape_Leng Shape_Area geometry
0 S01008417 Balerno and Bonnington Village - 01 City of Edinburgh 708 397 5537 4 8 16 80 ... 86 5392.0 17 8 2% 1% 6350.0 20191.721420 1.029993e+07 POLYGON ((315157.369 666212.846, 315173.727 66...
1 S01008418 Balerno and Bonnington Village - 02 City of Edinburgh 691 378 6119 5 9 18 88 ... 103 5063.0 7 10 1% 1% 6650.0 25944.861787 2.357050e+07 POLYGON ((317816.000 666579.000, 318243.000 66...

2 rows × 52 columns

simd[["EmpRate", "geometry"]].explore("EmpRate", tiles="CartoDB Positron")
Make this Notebook Trusted to load map: File -> Trust Notebook
grid_8 = tobler.util.h3fy(simd, resolution=8)
grid_8.head(2)
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/pyproj/crs/crs.py:1293: UserWarning: You will likely lose important projection information when converting to a PROJ string from another format. See: https://proj.org/faq.html#what-is-the-best-format-for-describing-coordinate-reference-systems
  proj = self._crs.to_proj4(version=version)
geometry
hex_id
8819727687fffff POLYGON ((324950.965 668053.612, 324487.174 66...
8819727737fffff POLYGON ((316052.636 673439.288, 315588.440 67...
m = simd.boundary.explore(tiles="CartoDB Positron")
grid_8.boundary.explore(m=m, color="red")
Make this Notebook Trusted to load map: File -> Trust Notebook

overlay

interpolated = tobler.area_weighted.area_interpolate(
    simd,
    grid_8,
    extensive_variables=["EmpNumDep", "IncNumDep"],
    intensive_variables=["EmpRate", "IncRate"],
)
interpolated.explore("EmpRate", tiles="CartoDB Positron")
Make this Notebook Trusted to load map: File -> Trust Notebook

Point interpolation

airbnb = gpd.read_file(
    "https://martinfleischmann.net/sds/chapter_09/data/edinburgh_airbnb_2023.gpkg"
)
airbnb.head()
id bedrooms property_type price geometry
0 15420 1.0 Entire rental unit $126.00 POINT (325921.137 674478.931)
1 790170 2.0 Entire condo $269.00 POINT (325976.360 677655.252)
2 24288 2.0 Entire loft $95.00 POINT (326069.186 673072.913)
3 821573 2.0 Entire rental unit $172.00 POINT (326748.646 674001.683)
4 822829 3.0 Entire rental unit $361.00 POINT (325691.831 674328.127)
airbnb.price.head()
0    $126.00
1    $269.00
2     $95.00
3    $172.00
4    $361.00
Name: price, dtype: object
airbnb["price_float"] = airbnb.price.str.strip("$").str.replace(",", "").astype(float)
two_bed_homes = airbnb[
    (airbnb["bedrooms"] == 2)
    & (airbnb["property_type"] == "Entire rental unit")
    & (airbnb["price_float"] < 300)
].copy()
two_bed_homes.head()
id bedrooms property_type price geometry price_float
3 821573 2.0 Entire rental unit $172.00 POINT (326748.646 674001.683) 172.0
5 834777 2.0 Entire rental unit $264.00 POINT (324950.724 673875.598) 264.0
6 450745 2.0 Entire rental unit $177.00 POINT (326493.725 672853.904) 177.0
10 485856 2.0 Entire rental unit $157.00 POINT (326597.124 673869.551) 157.0
17 51505 2.0 Entire rental unit $155.00 POINT (325393.807 674177.409) 155.0
two_bed_homes.geometry.duplicated().any()
True
two_bed_homes = two_bed_homes.drop_duplicates("geometry")
two_bed_homes.explore("price_float", tiles="CartoDB Positron")
Make this Notebook Trusted to load map: File -> Trust Notebook
knn = graph.Graph.build_kernel(two_bed_homes, k=10).transform("r")
two_bed_homes["price_lag"] = knn.lag(two_bed_homes.price_float)
two_bed_homes.explore("price_lag", tiles="CartoDB Positron")
Make this Notebook Trusted to load map: File -> Trust Notebook

Nearest

grid_10 = tobler.util.h3fy(simd, resolution=10)
len(grid_10)
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/pyproj/crs/crs.py:1293: UserWarning: You will likely lose important projection information when converting to a PROJ string from another format. See: https://proj.org/faq.html#what-is-the-best-format-for-describing-coordinate-reference-systems
  proj = self._crs.to_proj4(version=version)
20162
grid_coordinates = grid_10.centroid.get_coordinates()
grid_coordinates.head()
x y
hex_id
8a1972776097fff 315376.688774 677524.030454
8a197276a9a7fff 323580.442913 668992.710554
8a1972395307fff 331857.440516 672045.094490
8a1972748d77fff 321595.016956 664900.226769
8a19727768a7fff 316116.831850 678185.581115
coords = two_bed_homes.get_coordinates()
coords.head()
x y
3 326748.645636 674001.683211
5 324950.723888 673875.598033
6 326493.725178 672853.903917
10 326597.123684 673869.551295
17 325393.807222 674177.408961
nearest = interpolate.griddata(
    coords,
    two_bed_homes.price_lag,
    grid_coordinates,
    method="nearest",
)
nearest
array([106.14780144, 160.        , 156.02793396, ..., 188.70398583,
       188.70398583,  84.        ])
grid_10["nearest"] = nearest
_ = grid_10.plot('nearest', legend=True)

caption here
ax = grid_10.plot('nearest', legend=True)
two_bed_homes.plot(ax=ax, color="red", markersize=1)
<Axes: >

caption here

K-Nearest neighbours regression

Uniform

interpolation_uniform = neighbors.KNeighborsRegressor(n_neighbors=10, weights="uniform")
interpolation_uniform.fit(
    coords, two_bed_homes.price_lag
)
KNeighborsRegressor(n_neighbors=10)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
price_on_grid = interpolation_uniform.predict(grid_coordinates)
price_on_grid
array([120.51720211, 130.39712302, 173.18110465, ..., 120.36697169,
       120.36697169, 120.36697169])
grid_10["knn_uniform"] = price_on_grid
_ = grid_10.plot("knn_uniform", legend=True)

caption here

Distance-weighted

interpolation_distance = neighbors.KNeighborsRegressor(n_neighbors=10, weights="distance")
interpolation_distance.fit(
    coords, two_bed_homes.price_lag
)
KNeighborsRegressor(n_neighbors=10, weights='distance')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
grid_10["knn_distance"] = interpolation_distance.predict(grid_coordinates)
_ = grid_10.plot("knn_distance", legend=True)

caption here

Distance band regression

interpolation_radius = neighbors.RadiusNeighborsRegressor(radius=1000, weights="distance")
interpolation_radius.fit(
    coords, two_bed_homes.price_lag
)
RadiusNeighborsRegressor(radius=1000, weights='distance')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
grid_10["radius"] = interpolation_radius.predict(grid_coordinates)
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/sklearn/neighbors/_regression.py:500: UserWarning: One or more samples have no neighbors within specified radius; predicting NaN.
  warnings.warn(empty_warning_msg)
_ = grid_10.plot("radius", legend=True, missing_kwds={'color': 'lightgrey'})

caption here

Ordinary Kriging

input_data = np.hstack([coords, two_bed_homes.price_lag.values.reshape(-1, 1)])
input_data
array([[3.26748646e+05, 6.74001683e+05, 2.07033397e+02],
       [3.24950724e+05, 6.73875598e+05, 1.54805935e+02],
       [3.26493725e+05, 6.72853904e+05, 1.43865293e+02],
       ...,
       [3.28513265e+05, 6.74048892e+05, 1.06875409e+02],
       [3.26840903e+05, 6.74767224e+05, 1.68848108e+02],
       [3.25415664e+05, 6.73345158e+05, 2.15847334e+02]])
step_radius = 100  # meters
max_range = 5000  # meters

exp_semivar = pyinterpolate.build_experimental_variogram(
    input_array=input_data,
    step_size=step_radius,
    max_range=max_range,
)
exp_semivar.plot()

caption here
semivar = pyinterpolate.build_theoretical_variogram(
    experimental_variogram=exp_semivar,
    model_name='linear',
    sill=exp_semivar.variance,
    rang=5000
)
semivar.plot()

caption here
ordinary_kriging = pyinterpolate.kriging(
    observations=input_data,
    theoretical_model=semivar,
    points=grid_coordinates.values,
    no_neighbors=10,
    how="ok",
    show_progress_bar=False,
)
grid_10["ordinary_kriging"] = ordinary_kriging[:, 0]
_ = grid_10.plot("ordinary_kriging", legend=True)

caption here
grid_10["variance_error"] = ordinary_kriging[:, 1]
_ = grid_10.plot("variance_error", legend=True)

caption here

Use larger distance

semivar_larger = pyinterpolate.build_theoretical_variogram(
    experimental_variogram=exp_semivar,
    model_name='linear',
    sill=exp_semivar.variance,
    rang=15000,
)
ordinary_kriging_l = pyinterpolate.kriging(
    observations=input_data,
    theoretical_model=semivar_larger,
    points=grid_coordinates.values,
    no_neighbors=10,
    how="ok",
    show_progress_bar=False,
)
grid_10["ok_larger"] = ordinary_kriging_l[:, 0]
_ = grid_10.plot("ok_larger", legend=True)

caption here
grid_10["variance_error_l"] = ordinary_kriging_l[:, 1]
_ = grid_10.plot("variance_error_l", legend=True)

caption here